rm(list=ls(all=TRUE))
set.seed(610)
library(foreign)
library(openxlsx)

village2014=read.csv("C:/Users/wearytolove/Dropbox/natsa2016/2015 and older/natsa2015 -young/list2014.csv",stringsAsFactors=F)

head(village2014)

#==================Descriptive Analysis (P.9)=================#

#Number of KMT
sum(village2014$KMT)   #2633
sum(village2014$KMT)/nrow(village2014)   #18.6%
#Number of DPP
sum(village2014$DPP)   #718
sum(village2014$DPP)/nrow(village2014)   #5.1%

#Number of Incumbent
sum(village2014$Incumbent)   #6921
sum(village2014$Incumbent)/7848  #88.2%

#Incumbent seeking reelection
sum(village2014$Win==1&village2014$Incumbent==1) #5518
sum(village2014$Win==1&village2014$Incumbent==1)/7848 #70.2%



#================================================#
#==============H1:Supply Side (p.10)=============#
#================================================#

village2014$under40=0
village2014$under40[village2014$birthyear>1973]=1
sum(village2014$under40)   #730 young candidates
sum(village2014$under40)/nrow(village2014)  #5.16%

#Village election result 2010
village2010=read.csv("C:/Users/wearytolove/Dropbox/natsa2016/2015 and older/natsa2015 -young/list2010.csv",stringsAsFactors=F)
village2010$under40=0
village2010$under40[village2010$Birthyear>1969]=1
sum(village2010$under40)   #901 young candidates
sum(village2010$under40)/nrow(village2010)  #5.8%

#By partisan?
#DPP
sum(village2014$under40==1&village2014$DPP==1)   #70 young and DPP candidates
sum(village2014$under40==1&village2014$DPP==1)/ sum(village2014$DPP==1)  #9.7%
sum(village2010$under40==1&village2010$DPP==1)   #53 young and DPP candidates in 2010
sum(village2010$under40==1&village2010$DPP==1)/ sum(village2010$DPP==1)  #9.1%



#===============================================#
#================H2 Demand side=================#
#===============================================#

mean(village2014$voteshare)  #55.51%
mean(village2014$voteshare[village2014$under40==1])  #42.2% young got in 2014
mean(village2010$Voteshare[village2010$under40==1])  #42.8%

#Only one challenger and one incumbent
mean(village2014$voteshare[village2014$under40==1&
                          village2014$NumCan==2&
                            village2014$N_Inc_count==1&
                            village2014$Incumbent==0])  #43.4% young challengers got in 1-1 in 2014
mean(village2014$voteshare[village2014$under40==0&
                             village2014$NumCan==2&
                             village2014$N_Inc_count==1&
                             village2014$Incumbent==0])  #44.5% old challengers got in 1-1 in 2014

#Reelection rate

sum(village2014$Win==1&village2014$under40==1)  #291 young won in 2014
sum(village2014$Win==1&village2014$under40==1)/sum(village2014$under40==1)  #39.9%
sum(village2014$Win==1&village2014$under40==1)/sum(village2014$Win==1)  #3.7% of all winners

sum(village2010$Win==1&village2010$under40==1)  #388 young won in 2010
sum(village2010$Win==1&village2010$under40==1)/sum(village2010$under40==1)  #43.1%
sum(village2010$Win==1&village2010$under40==1)/sum(village2010$Win==1)  #5.0% of all winners


#Only focus on DPP
mean(village2014$voteshare[village2014$under40==1&village2014$DPP==1])  #43.6% young got in 2014
mean(village2010$Voteshare[village2010$under40==1&village2010$DPP==1])  #41.8%

sum(village2014$Win==1&village2014$under40==1&village2014$DPP==1)/sum(village2014$under40==1&village2014$DPP==1)  #47.1%
sum(village2014$Win==1&village2014$under40==1&village2014$DPP==1)/sum(village2014$Win==1&village2014$DPP==1)  #8.4% of all winners

sum(village2010$Win==1&village2010$under40==1&village2010$DPP==1)/sum(village2010$under40==1&village2010$DPP==1)  #43.4%
sum(village2010$Win==1&village2010$under40==1&village2010$DPP==1)/sum(village2010$Win==1&village2010$DPP==1)  #8.5% of all winners


#DPP Challenger?
sum(village2014$Win==1&village2014$under40==1&village2014$DPP==1&village2014$Incumbent==0)/sum(village2014$under40==1&village2014$DPP==1&village2014$Incumbent==0)  
sum(village2010$Win==1&village2010$under40==1&village2010$DPP==1&village2010$Incumbent==0)/sum(village2010$under40==1&village2010$DPP==1&village2010$Incumbent==0)  


#==========================================================#
#===========H2-2 Turnout===================================#
#==========================================================#

#Combining 2014 and 2010dataset
combine1014=read.csv("C:/Users/wearytolove/Dropbox/natsa2016/2015 and older/natsa2015 -young/test_li.csv",stringsAsFactors=F)
head(combine1014)

#Turnout Rate by condition (FIghre 1)
mean(combine1014$Turnout[combine1014$NumCan==1])  #One candidate 67% Turnout rate
sum(combine1014$NumCan==1)# 2994 villages

mean(combine1014$Turnout[combine1014$NumCan==2])  #Two candidates 72.6% tr
sum(combine1014$NumCan==2) #3705 villages

#One old v.s. one young
mean(combine1014$Turnout[combine1014$NumCan==2&combine1014$NumYoung==1]) #70.8% tr
sum(combine1014$NumCan==2&combine1014$NumYoung==1)  #330 villages

#Three candidates
mean(combine1014$Turnout[combine1014$NumCan==3])  #72.05% tr
sum(combine1014$NumCan==3) #918 villages

#Three candidates-one young 2 old
mean(combine1014$Turnout[combine1014$NumCan==3&combine1014$NumYoung==1])  #70.08% tr
sum(combine1014$NumCan==3&combine1014$NumYoung==1) #224 villages

#======================================================#
#=========Matching for Turnout Rate P.14===============#
#======================================================#

install.packages("MatchIt")
library(MatchIt)

#From 1 to 2 candidate 
data2=combine1014[combine1014$NumCan<=2,]
data2$tr1=data2$NumCan-1
data2$tr1=1-data2$tr1
m.out_INQ1=matchit(tr1~comp2010+NumInc+Electorates,data=data2,method="nearest",ratio=1)
summary(m.out_INQ1)
plot(m.out_INQ1,type="jitter")
plot(m.out_INQ1,type="hist")
m.data1 = match.data(m.out_INQ1) 
summary(m.data1$Turnout[m.data1$tr1==0])
summary(m.data1$Turnout[m.data1$tr1==1])
t.test(Turnout ~ tr1, data=m.data1) 
#0.7287981       0.6662062  
# p-value < 2.2e-16
mean(m.data1$Turnout[m.data1$tr1==1])-mean(m.data1$Turnout[m.data1$tr1==0])

#====Comparing adding one more young to single candidate
data2=rbind(combine1014[combine1014$NumCan==1,],combine1014[combine1014$NumCan==2&combine1014$NumYoung==1,])
data2$tr1=data2$NumCan-1
data2=na.omit(data2)
m.out_INQ2=matchit(tr1~comp2010+NumInc+Electorates,data=data2,method="nearest",ratio=1)
summary(m.out_INQ2)
plot(m.out_INQ1,type="jitter")
plot(m.out_INQ1,type="hist")
m.data2 = match.data(m.out_INQ2) 
summary(m.data2$Turnout[m.data2$tr1==0])
summary(m.data2$Turnout[m.data2$tr1==1])
t.test(Turnout ~ tr1, data=m.data2) 
#0.6633821       0.7081027
#p-value = 3.897e-13
mean(m.data2$Turnout[m.data2$tr1==1])-mean(m.data2$Turnout[m.data2$tr1==0])


#===Comparing adding one more to competition district
data2=rbind(combine1014[combine1014$NumCan==2,],combine1014[combine1014$NumCan==3,])
data2$tr1=data2$NumCan-2
data2=na.omit(data2)
m.out_INQ2=matchit(tr1~comp2010+NumInc+Electorates,data=data2,method="nearest",ratio=1)
summary(m.out_INQ2)
plot(m.out_INQ1,type="jitter")
plot(m.out_INQ1,type="hist")
m.data2 = match.data(m.out_INQ2) 
summary(m.data2$Turnout[m.data2$tr1==0])
summary(m.data2$Turnout[m.data2$tr1==1])
t.test(Turnout ~ tr1, data=m.data2) 
mean(m.data2$Turnout[m.data2$tr1==1])-mean(m.data2$Turnout[m.data2$tr1==0])

#===Comparing adding one more young to competition district
data2=rbind(combine1014[combine1014$NumCan==2,],combine1014[combine1014$NumCan==3&combine1014$NumYoung==1,])
data2$tr1=data2$NumCan-2
data2=na.omit(data2)
m.out_INQ2=matchit(tr1~comp2010+NumInc+Electorates,data=data2,method="nearest",ratio=1)
summary(m.out_INQ2)
plot(m.out_INQ1,type="jitter")
plot(m.out_INQ1,type="hist")
m.data2 = match.data(m.out_INQ2) 
summary(m.data2$Turnout[m.data2$tr1==0])
summary(m.data2$Turnout[m.data2$tr1==1])
t.test(Turnout ~ tr1, data=m.data2) 
mean(m.data2$Turnout[m.data2$tr1==1])-mean(m.data2$Turnout[m.data2$tr1==0])

#================================================#
#===========Influence the incumbent H3===========#
#================================================#

#Data combined 2010 and 2014 with same challenger and incumbent's name
data=read.csv("C:/Users/wearytolove/Dropbox/natsa2016/2015 and older/natsa2015 -young/clear_treatmentVR.csv",stringsAsFactors=F)
head(data)
table(data$N_Reele)  #Number of same candidates in district

data3=data[is.na(data$T_C)==0,] #Delete other district
data3$T_C=1-data3$T_C   #1= no young candidate join, 0 = 1 young candidate join
data3$IncVR=NA
for(i in 1: nrow(data3)){   
  #Incumbent's voteshare in reelection with 2 and 3 candidates in district
  if(is.na(data3$Inc_2_2010[i])==0){
    data3$IncVR[i]=data3$Inc_2_2010[i]
  } 
  
  if(is.na(data3$Inc_3_2010[i])==0){
    data3$IncVR[i]=data3$Inc_3_2010[i]
  }
}
data3$OppVR=NA
for(i in 1: nrow(data3)){
  #Main opponent's voteshare in reelection with 2 and 3 candidates in district
  
  if(is.na(data3$Opp_2_2010[i])==0){
    data3$OppVR[i]=data3$Opp_2_2010[i]
  } 
  
  if(is.na(data3$Opp_3_2010[i])==0){
    data3$OppVR[i]=data3$Opp_3_2010[i]
  }
}
#Match the district with and without one young candidate joined in 2014
data4=data.frame(cbind(data3$T_C,data3$comp2010,data3$Electorates))
m.out_INQ1=matchit(X1~X2+X3,data=data4,method="nearest",ratio=1)
m.data1 = match.data(m.out_INQ1) 

data3$matchuse=0
for(j in 1:98){
  data3$matchuse[as.numeric(row.names(m.data1)[j])]=1
}

library(pastecs)
stat.desc(mydata) 
  
summary((data3$Inc_in_2[data3$matchuse==1]),na.rm=T)  #Inc's voteshare in re-competition
summary((data3$Opp_in_2[data3$matchuse==1]),na.rm=T)  #Opp's voteshare in re-competition
stat.desc((data3$Inc_in_2[data3$matchuse==1]-data3$Opp_in_2[data3$matchuse==1])) #Difference

summary((data3$Inc_2_2010[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_2_2010[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_2_2010[data3$matchuse==1]-data3$Opp_2_2010[data3$matchuse==1]))


summary((data3$Inc_in_3[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_in_3[data3$matchuse==1]),na.rm=T)
summary((data3$Young_in_3[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_in_3[data3$matchuse==1]-data3$Opp_in_3[data3$matchuse==1]))


summary((data3$Inc_3_2010[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_3_2010[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_3_2010[data3$matchuse==1]-data3$Opp_3_2010[data3$matchuse==1]))

figure1.data=NULL
figure1.data$group=c("control","control","treatment","treatment")
figure1.data=data.frame(figure1.data)
figure1.data$year=c(2010,2014,2010,2014)
head(figure1.data)
figure1.data$mean[1]=stat.desc((data3$Inc_2_2010[data3$matchuse==1]-data3$Opp_2_2010[data3$matchuse==1]))[9]
figure1.data$mean[2]=stat.desc((data3$Inc_in_2[data3$matchuse==1]-data3$Opp_in_2[data3$matchuse==1]))[9]
figure1.data$mean[3]=stat.desc((data3$Inc_3_2010[data3$matchuse==1]-data3$Opp_3_2010[data3$matchuse==1]))[9]
figure1.data$mean[4]=stat.desc((data3$Inc_in_3[data3$matchuse==1]-data3$Opp_in_3[data3$matchuse==1]))[9]

figure1.data$str[1]=stat.desc((data3$Inc_2_2010[data3$matchuse==1]-data3$Opp_2_2010[data3$matchuse==1]))[10]
figure1.data$str[2]=stat.desc((data3$Inc_in_2[data3$matchuse==1]-data3$Opp_in_2[data3$matchuse==1]))[10]
figure1.data$str[3]=stat.desc((data3$Inc_3_2010[data3$matchuse==1]-data3$Opp_3_2010[data3$matchuse==1]))[10]
figure1.data$str[4]=stat.desc((data3$Inc_in_3[data3$matchuse==1]-data3$Opp_in_3[data3$matchuse==1]))[10]

head(figure1.data)

library(ggplot2)
ggplot(figure1.data, aes(x=year, y=mean, colour=group)) +
  geom_point(position=position_dodge(.6),size=5) +
  geom_line(position=position_dodge(.6),size=1.3)+
  geom_errorbar(aes(ymin=mean-1.96*str, ymax=mean+1.96*str),width=1,position=position_dodge(.6),size=1.3)+
  scale_x_continuous(breaks=c(2010,2014))+xlim(2008,2016)+
  labs(title="",
       x="Election Year",
       y="Voteshare Diff. btw Incumbent and Challenger") +             
  theme(legend.title=element_blank(),
        plot.title=element_text(size=18),
        axis.title.y = element_text(size = 14, face = "bold"),
        axis.title.x = element_text(size = 14, face = "bold"),
        axis.text = element_text(size=16,colour="black"),
        legend.text=element_text(size = 12, face = "bold"))+
  scale_colour_discrete(
    breaks=c("control","treatment"),
    labels=c(" Inc and Chal compete twice ", " One more Young joined in 2014"))

#==============Percentage of winning===============#
rm(list=ls(all=TRUE))
set.seed(610)

data=read.csv("C:/Users/wearytolove/Dropbox/natsa2016/2015 and older/natsa2015 -young/clear_treatmentWIN.csv",stringsAsFactors=F)


data3=data[is.na(data$T_C)==0,]
data3$T_C=1-data3$T_C
data4=data.frame(cbind(data3$T_C,data3$comp2010,data3$Electorates))
m.out_INQ1=matchit(X1~X2+X3,data=data4,method="nearest",ratio=1)
m.data1 = match.data(m.out_INQ1) 

data3$matchuse=0
for(j in 1:98){
  data3$matchuse[as.numeric(row.names(m.data1)[j])]=1
}

library(pastecs)

summary((data3$Inc_in_2[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_in_2[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_in_2[data3$matchuse==1]))

summary((data3$Inc_2_2010[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_2_2010[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_2_2010[data3$matchuse==1]-data3$Opp_2_2010[data3$matchuse==1]))


stat.desc((data3$Inc_in_3[data3$matchuse==1]))
summary((data3$Opp_in_3[data3$matchuse==1]),na.rm=T)
summary((data3$Young_in_3[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_in_3[data3$matchuse==1]-data3$Opp_in_3[data3$matchuse==1]))


summary((data3$Inc_3_2010[data3$matchuse==1]),na.rm=T)
summary((data3$Opp_3_2010[data3$matchuse==1]),na.rm=T)
stat.desc((data3$Inc_3_2010[data3$matchuse==1]-data3$Opp_3_2010[data3$matchuse==1]))

figure2.data=NULL
figure2.data$group=c("control","control","treatment","treatment")
figure2.data=data.frame(figure2.data)
figure2.data$year=c(2010,2014,2010,2014)

figure2.data$mean[1]=stat.desc((data3$Inc_2_2010[data3$matchuse==1]))[9]
figure2.data$mean[2]=stat.desc((data3$Inc_in_2[data3$matchuse==1]))[9]
figure2.data$mean[3]=stat.desc((data3$Inc_3_2010[data3$matchuse==1]))[9]
figure2.data$mean[4]=stat.desc((data3$Inc_in_3[data3$matchuse==1]))[9]

figure2.data$str[1]=stat.desc((data3$Inc_2_2010[data3$matchuse==1]))[10]
figure2.data$str[2]=stat.desc((data3$Inc_in_2[data3$matchuse==1]))[10]
figure2.data$str[3]=stat.desc((data3$Inc_3_2010[data3$matchuse==1]))[10]
figure2.data$str[4]=stat.desc((data3$Inc_in_3[data3$matchuse==1]))[10]

head(figure2.data)

ggplot(figure2.data, aes(x=year, y=mean, colour=group)) +
  geom_point(position=position_dodge(.6),size=5) +
  geom_line(position=position_dodge(.6),size=1.3)+
  geom_errorbar(aes(ymin=mean-1.96*str, ymax=mean+1.96*str),width=1,position=position_dodge(.6),size=1.3)+
  scale_x_continuous(breaks=c(2010,2014))+xlim(2008,2016)+
  labs(title="",
       x="Election Year",
       y="Probability of Incumbent being reelected") +             
  theme(legend.title=element_blank(),
        plot.title=element_text(size=18),
        axis.title.y = element_text(size = 14, face = "bold"),
        axis.title.x = element_text(size = 14, face = "bold"),
        axis.text = element_text(size=16,colour="black"),
        legend.text=element_text(size = 12, face = "bold"))+
  scale_colour_discrete(
    breaks=c("control","treatment"),
    labels=c(" Inc and Chal compete twice ", " One more Young joined in 2014"))
